/* 
    Purpose: Using the 1980 Census, this file takes cleaned variables 
             from 5c and calculates average predicted mother income 
             (i.e., "income scores") at ONLY the occupation x race x south
             level.

    Note: Will create templates/income scores at occ and occ x race levels 
          solely to impute income for missing occ x race x south cells.

    Creates: incomescores_mothers1980_byocc_byr_bys.dta
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"
    
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************
*** TEMPLATES
*******************

/* Note: Census microdata from 5c does not have 
         all 28 coarsened occupations, so will use 
         the template below that does have all occupations. 
*/    
    use ./code/OtherCensus_RawData/motheroccej_template_occs.dta, clear 

* Template 1: occupation
preserve 
    
    tempfile template_byocc
    save `template_byocc'
        
restore 

* Template 2: occupation x race
preserve 
    
    expand 2, gen(race)
    replace race=race+1
    
    tempfile template_byocc_byr
    save `template_byocc_byr'
        
restore 

* Template 3: occupation x race x south
preserve 
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 2, gen(south_merge)
    
    tempfile template_byocc_byr_bys
    save `template_byocc_byr_bys'
        
restore 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

    use ./output/Census1980_mothers_ages30to50.dta, clear   
    gen number=1 

    global income_measures "inctot faminc HHinc"

*******************
*** COLLAPSE 
*******************

foreach x in byocc byocc_byr byocc_byr_bys {

    if "`x'"=="byocc" local cell "motheroccej"
    if "`x'"=="byocc_byr" local cell "motheroccej race"
    if "`x'"=="byocc_byr_bys" local cell "motheroccej race south_merge"

preserve 
    
    collapse (rawsum) number (mean) $income_measures [aw=perwt], by(`cell') 
    
    foreach c in $income_measures {
    rename `c' avg_`c'_1980_`x'
    label var avg_`c'_1980_`x' "Coarse (mother) income score, average, 1980 using `c'"
    }
    
    tempfile incomescores
    save `incomescores'
    
    use `template_`x''
    merge 1:1 `cell' using `incomescores', nogen
    
    replace number=0 if number==.
    label var number "Number of obs in cell"
    rename number number_1980obs_`x'

    tempfile incomescores_`x'
    save `incomescores_`x''
    
restore 
        
}

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
*** IMPUTATIONS
****************

/*Note: Will impute any missing occ x race x south cells 
        with average value of less granular level. */

    local name1 "byocc"
    local name2 "byocc_byr"
    local name3 "byocc_byr_bys"
        
* Merge all templates together. Bring in template with largest # of cells first.  
    use `incomescores_byocc_byr_bys', clear
    merge m:1 motheroccej race using `incomescores_byocc_byr', nogen
    merge m:1 motheroccej using `incomescores_byocc', nogen

    * Impute occ x race using average occupation value
    foreach c in $income_measures {
    count if avg_`c'_1980_`name2'==.
    replace avg_`c'_1980_`name2' = avg_`c'_1980_`name1' if avg_`c'_1980_`name2'==.
    }
   
    * Impute occ x race x south using average occ x race value
    foreach c in $income_measures {
    count if avg_`c'_1980_`name3'==.
    replace avg_`c'_1980_`name3' = avg_`c'_1980_`name2' if avg_`c'_1980_`name3'==.
    }  
 
***********
* SAVE
***********

    foreach x in byocc_byr_bys {
    
    if "`x'"=="byocc_byr_bys" local cell "motheroccej race south_merge"

    preserve 
    
    bysort `cell': keep if _n==1
    keep number_1980obs_`x' `cell' avg*`x'
    
    /*Note: No cases where average income is missing 
            at all levels. No extra imputations needed. */
    
    assert avg_inctot_1980_`x'!=.
    
    compress
    save ./output/incomescores_mothers1980_`x'.dta, replace
    
    restore
    
    }
